In [1]:
from time import time
import psycopg2
from collections import Counter
import gc
import pandas as pd
import numpy as np
from sklearn.decomposition import LatentDirichletAllocation
from sklearn.metrics import classification_report
np.set_printoptions(suppress=True,precision=10)
from sklearn.model_selection import train_test_split
In [2]:
import sys
import os
sys.path.append(os.path.abspath("/home/scidb/HeartRatePatterns/Python"))
from LogisticRegresion import ajustLogisticRegression
from Matrix import convert_matrix
In [3]:
def selectMatrix(with_pearson):
table = convert_matrix(with_pearson=with_pearson,len_words=(3,3))
subjects = table.index.levels[0].tolist()
survived = table.index.labels[1].tolist()
patients = table.values
patients_train, patients_test,subject_train, subject_test,survived_train, survived_test = train_test_split(patients,subjects,
survived,test_size=0.2, random_state=42)
model,accuracy_score,roc_auc = ajustLogisticRegression(patients_train,survived_train,patients_test,survived_test)
regresion = model.predict_proba(patients_test)[:,1]
return survived_test,regresion,roc_auc
In [27]:
nmfresults = []
for i in (1,42,84,168,252,294,336):
survived_test,regresion,roc_auc = selectMatrix(i)
nmfresults.append({'name':i, 'survived_test':survived_test,'regresion':regresion,'roc_auc':roc_auc})
In [28]:
from sklearn.metrics import roc_curve
import matplotlib.pyplot as plt
In [29]:
def roc_curveplot(name,y_true,y_score,logit_roc_auc):
fpr, tpr, thresholds = roc_curve(y_true,y_score)
plt.plot(fpr,tpr,label='n= '+str(name)+' AUC =%0.3f' % logit_roc_auc)
In [30]:
plt.figure()
for nmfresult in nmfresults:
roc_curveplot(nmfresult['name'],nmfresult['survived_test'],nmfresult['regresion'],nmfresult['roc_auc'])
plt.plot([0,1],[0,1],'k--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.legend(loc="lower right")
plt.show()
In [ ]: